version 18.0               // version control
set processors 8           // to ensure replicability across different numbers of cores
clear all                  // clear existing data
macro drop _all            // and macros, clean slate
set seed 20220909          // set seed
/* 
Notes: 
1) Must put "set sortseed #" right on top of EACH "telasso" (rather than at the top of the do file) to ensure the same number of selected controls will be reported.
2) For detailed explanations, see Stata's technical support's email chain on 10/10/2023. 
*/

local pgm  "dst-Table_4_Panel_A_depression_telasso_drop_cities"      // file name
local who  "Muzhe Yang"                                              // author
local dte  "2025-01-20"                                              // created date
local dte2 "`c(current_date)'"                                       // last run date
local tag  "`pgm'.do, created by `who' on `dte', last run on `dte2'"

capture log close
log using "code\analysis\tables\\`pgm'.txt", text replace 
display "`tag'"

**# data prep ------------------------------------------------------------------------------------

use "data_clean\dst-data04_for_estimation_within_250_miles", clear
summ dist_to_border
tab wave, missing 
des depression
keep if wave == 2
codebook TractFIPS

*## (1) create the 9 cells, following page 215 of the following paper:
*## Giuntella, O. and F. Mazzonna (2019). "Sunset Time and the Economic Effects of Social Jetlag: Evidence from US Time Zone Borders." Journal of Health Economics 65: 210-226.

assert !missing(centroid_lat)
assert !missing(region)
gen     cell = 1 if centroid_lat > 40                        & region == "eastern and central"
replace cell = 2 if (34 < centroid_lat & centroid_lat <= 40) & region == "eastern and central"
replace cell = 3 if centroid_lat <= 34                       & region == "eastern and central"
replace cell = 4 if centroid_lat > 40                        & region == "central and mountain"
replace cell = 5 if (34 < centroid_lat & centroid_lat <= 40) & region == "central and mountain"
replace cell = 6 if centroid_lat <= 34                       & region == "central and mountain"
replace cell = 7 if centroid_lat > 40                        & region == "mountain and pacific"
replace cell = 8 if (34 < centroid_lat & centroid_lat <= 40) & region == "mountain and pacific"
replace cell = 9 if centroid_lat <= 34                       & region == "mountain and pacific"
tab cell time_zone, missing
summ centroid_lat if cell == 1 | cell == 4 | cell == 7
summ centroid_lat if cell == 2 | cell == 5 | cell == 8
summ centroid_lat if cell == 3 | cell == 6 | cell == 9

*## (2) control variables

global x "pop white_pct black_pct hispanic_pct pop_under_18yr_pct pop_65yr_over_pct age_median educ_hs_pct educ_coll_pct married_pct hh_size hh_income_median home_value_median no_health_ins_pct unemploy_pct"
global cvars    c.($x dist_to_border centroid_lat daylight_dur_max)##c.($x dist_to_border centroid_lat daylight_dur_max)
global controls $cvars i.cell i.cell#c.($x dist_to_border centroid_lat daylight_dur_max) 

des  $x dist_to_border centroid_lat daylight_dur_max
summ $x dist_to_border centroid_lat daylight_dur_max

**# estimation prep -------------------------------------------------

encode CountyFIPS, gen(county_fips)
egen nomiss = rowmiss($x dist_to_border centroid_lat daylight_dur_max)
assert !missing(StateAbbr)
assert !missing(CountyName)
gen chicago      = (StateAbbr == "IL" & CountyName == "Cook")
gen milwaukee    = (StateAbbr == "WI" & CountyName == "Milwaukee")
gen louisville   = (StateAbbr == "KY" & CountyName == "Jefferson")
gen atlanta      = (StateAbbr == "GA" & CountyName == "Fulton")
foreach var in chicago milwaukee louisville atlanta {
    display "`var' in the full sample"
    summ dist_to_border centroid_lat if `var' == 1
    display ""
}
local radius = 50
gen sample_selection = (nomiss == 0 & StateAbbr != "AZ" & dist_to_border <= `radius')
global xfolds_resample "xfolds(10) resample(3) nolog"

**# table ---------------------------------------------------------------------------------

set sortseed 12102023
telasso (depression $controls) (treat $controls) if sample_selection == 1 & region == "eastern and central", vce(cluster county_fips) $xfolds_resample rseed(10101)
estimates store m1
scalar k_controls = e(k_controls)
scalar k_controls_sel = e(k_controls_sel)
scalar city_milwaukee  = "Included"
scalar city_chicago    = "Included"
scalar city_louisville = "Included"
scalar city_atlanta    = "Included"
quietly etable, replace ///
        column(index) ///
        keep(r1vs0.treat) ///
        cstat(_r_b,  nformat(%9.3f)) ///
        cstat(_r_se, nformat(%9.3f)) ///
        mstat(N, nformat(%9.0fc) label("Number of observations")) ///
        mstat(k_controls, nformat(%9.0fc) label("Number of potential predictor variables")) ///
        mstat(k_controls_sel, nformat(%9.0fc) label("Number of selected predictor variables")) ///
        mstat(case_city_milwaukee  = city_milwaukee,  label("Milwaukee County, WI, which includes the city of Milwaukee")) ///
        mstat(case_city_chicago    = city_chicago,    label("Cook County, IL, which includes the city of Chicago")) ///
        mstat(case_city_louisville = city_louisville, label("Jefferson County, KY, which includes the city of Louisville")) ///
        mstat(case_city_atlanta    = city_atlanta,    label("Fulton County, GA, which includes the city of Atlanta")) ///
        stars(0.10 "*" 0.05 "**" 0.01 "***", attach(_r_b) decreasing pvname("p-value") nformat(%9.2f)) showstars showstarsnote ///
        novarlabel nocenter 
foreach var in chicago milwaukee louisville atlanta {
    display "`var' in the ET-CT estimation sample"
    summ dist_to_border centroid_lat if `var' == 1 & e(sample) == 1
    display ""
}

set sortseed 12102023
telasso (depression $controls) (treat $controls) if sample_selection == 1 & region == "eastern and central" & (centroid_lat > 40 & !missing(centroid_lat)), vce(cluster county_fips) $xfolds_resample rseed(10101)
estimates store m2
scalar k_controls = e(k_controls)
scalar k_controls_sel = e(k_controls_sel)
scalar city_milwaukee  = "Included"
scalar city_chicago    = "Included"
scalar city_louisville = "Excluded"
scalar city_atlanta    = "Excluded"
quietly etable, append
foreach var in chicago milwaukee louisville atlanta {
    display "`var' in the ET-CT estimation sample"
    summ dist_to_border centroid_lat if `var' == 1 & e(sample) == 1
    display ""
}

set sortseed 12102023
telasso (depression $controls) (treat $controls) if sample_selection == 1 & region == "eastern and central" & (centroid_lat > 40 & !missing(centroid_lat)) & chicago == 0, vce(cluster county_fips) $xfolds_resample rseed(10101)
estimates store m3
scalar k_controls = e(k_controls)
scalar k_controls_sel = e(k_controls_sel)
scalar city_milwaukee  = "Included"
scalar city_chicago    = "Excluded"
scalar city_louisville = "Excluded"
scalar city_atlanta    = "Excluded"
quietly etable, append
foreach var in chicago milwaukee louisville atlanta {
    display "`var' in the ET-CT estimation sample"
    summ dist_to_border centroid_lat if `var' == 1 & e(sample) == 1
    display ""
}

set sortseed 12102023
telasso (depression $controls) (treat $controls) if sample_selection == 1 & region == "eastern and central" & (centroid_lat > 40 & !missing(centroid_lat)) & chicago == 0 & milwaukee == 0, vce(cluster county_fips) $xfolds_resample rseed(10101)
estimates store m4
scalar k_controls = e(k_controls)
scalar k_controls_sel = e(k_controls_sel)
scalar city_milwaukee  = "Excluded"
scalar city_chicago    = "Excluded"
scalar city_louisville = "Excluded"
scalar city_atlanta    = "Excluded"
quietly etable, append
foreach var in chicago milwaukee louisville atlanta {
    display "`var' in the ET-CT estimation sample"
    summ dist_to_border centroid_lat if `var' == 1 & e(sample) == 1
    display ""
}

set sortseed 12102023
telasso (depression $controls) (treat $controls) if sample_selection == 1 & region == "eastern and central" & centroid_lat <= 40, vce(cluster county_fips) $xfolds_resample rseed(10101)
estimates store m5
scalar k_controls = e(k_controls)
scalar k_controls_sel = e(k_controls_sel)
scalar city_milwaukee  = "Excluded"
scalar city_chicago    = "Excluded"
scalar city_louisville = "Included"
scalar city_atlanta    = "Included"
quietly etable, append
foreach var in chicago milwaukee louisville atlanta {
    display "`var' in the ET-CT estimation sample"
    summ dist_to_border centroid_lat if `var' == 1 & e(sample) == 1
    display ""
}

set sortseed 12102023
telasso (depression $controls) (treat $controls) if sample_selection == 1 & region == "eastern and central" & centroid_lat <= 40 & louisville == 0, vce(cluster county_fips) $xfolds_resample rseed(10101)
estimates store m6
scalar k_controls = e(k_controls)
scalar k_controls_sel = e(k_controls_sel)
scalar city_milwaukee  = "Excluded"
scalar city_chicago    = "Excluded"
scalar city_louisville = "Excluded"
scalar city_atlanta    = "Included"
quietly etable, append
foreach var in chicago milwaukee louisville atlanta {
    display "`var' in the ET-CT estimation sample"
    summ dist_to_border centroid_lat if `var' == 1 & e(sample) == 1
    display ""
}

set sortseed 12102023
telasso (depression $controls) (treat $controls) if sample_selection == 1 & region == "eastern and central" & centroid_lat <= 40 & louisville == 0 & atlanta == 0, vce(cluster county_fips) $xfolds_resample rseed(10101)
estimates store m7
scalar k_controls = e(k_controls)
scalar k_controls_sel = e(k_controls_sel)
scalar city_milwaukee  = "Excluded"
scalar city_chicago    = "Excluded"
scalar city_louisville = "Excluded"
scalar city_atlanta    = "Excluded"
quietly etable, append
foreach var in chicago milwaukee louisville atlanta {
    display "`var' in the ET-CT estimation sample"
    summ dist_to_border centroid_lat if `var' == 1 & e(sample) == 1
    display ""
}

collect title "Table: The Impact of Circadian Misalignment Estimated by the Double/Debiased ML, Dropping Counties That Include Large Cities" 

collect layout 
collect addtags top_row[1]
collect recode etable_estimates 1 = column1 ///
                                2 = column2 3 = column2 4 = column2 ///
                                5 = column3 6 = column3 7 = column3
collect layout (colname[r1vs0.treat]#result[_r_b _r_se] result[N k_controls k_controls_sel case_city_milwaukee case_city_chicago case_city_louisville case_city_atlanta]) ///
               (top_row[1]#etable_estimates#cmdset#stars)

collect label levels top_row ///
        1 "Treat = 1 for census tracts located east of the time zone border; Treat = 0 for census tracts located west of the time zone border", modify
collect label levels etable_estimates ///
        column1 "All census tracts in the Eastern Time (ET) and Central Time (CT) zones" ///
        column2 "Census tracts in the northern part of the ET and CT zones, defined by the latitude (in degrees) of the centroid of a census tract > 40" ///
        column3 "Census tracts in the southern part of the ET and CT zones, defined by the latitude (in degrees) of the centroid of a census tract ≤ 40", modify 
collect label levels cmdset ///
        1 "(1)" 2 "(2)" 3 "(3)" 4 "(4)" 5 "(5)" 6 "(6)" 7 "(7)", modify
collect label levels colname ///
        r1vs0.treat "Treat (1/0)", modify 

collect style header top_row, level(label)
collect style header etable_estimates, level(label)
collect style header cmdset, level(label)
collect style header colname, level(label)
collect style cell result[_r_b N k_controls k_controls_sel], sformat(%s)
collect style cell border_block[corner], border(top, pattern(double))
collect style cell border_block[column-header], border(top, pattern(double))
collect style cell border_block[row-header], border(bottom, pattern(double))
collect style cell border_block[item], border(bottom, pattern(double))

collect export "code\analysis\tables\\`pgm'.xlsx", replace

log close
exit